In [30]:
import pandas as pd

import pandas as pd
#東京電力のオープンデータを読み込む
elec = pd.read_csv(
    "tepco.csv",
#日付と時間を結合したセルに変更する
    parse_dates={'datetime': ['DATE', 'TIME']})
#使用する文字列をアルファベットに変更する
columns={
    "東京エリア需要":"tky_dmnd",
    "火力":"thp",
    "水力":"hyd",
    "太陽光発電実績":"slr"
}
#再度読み込みを行う
elec.rename(columns=columns, inplace=True)
In [31]:
import plotly
plotly.offline.init_notebook_mode(connected=False)
#ラベルオプションの設定を行う
data = [plotly.graph_objs.Scatter(x=elec["datetime"], y=elec["tky_dmnd"], name="東京エリア需要")]
layout = plotly.graph_objs.Layout(
    title="東京エリア需要(1時間毎の時系列)",
    legend={"x":0.8, "y":0.1},
    xaxis={"title":"datetime"},
    yaxis={"title":"東京エリア需要(万kw)"}
)
fig = plotly.graph_objs.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, show_link=False)
In [32]:
thp = plotly.graph_objs.Scatter(x=elec["datetime"], y=elec["thp"], mode = "lines", name="火力", marker=dict(color="rgba(220,20,60,0.7)"))
hyd = plotly.graph_objs.Scatter(x=elec["datetime"], y=elec["hyd"], mode = "lines", name="水力", marker=dict(color="rgba(0,128,255,0.8)"))
slr = plotly.graph_objs.Scatter(x=elec["datetime"], y=elec["slr"], mode = "lines", name="太陽光", marker=dict(color="rgba(255,165,0,0.8)"))
#火力・水力・太陽光のデータの結合を行う
el = [thp, slr, hyd]

layout = plotly.graph_objs.Layout(
    title="発電量割合(1時間毎の時系列)",
    legend={"x":1.0, "y":1.0},
    xaxis={"title":"date_time"},
    yaxis={"title":"発電量(万kw)"}
)
fig = plotly.graph_objs.Figure(data=el, layout=layout)
plotly.offline.iplot(fig, show_link=False)
In [33]:
#気象庁のデータを読み出す

wh = pd.read_csv('wh.csv')
wh.head()
Out[33]:
date time temp rain sunshine
0 2016-04-01 01:00:00 13.3 0.0 0.0
1 2016-04-01 02:00:00 13.8 0.0 0.0
2 2016-04-01 03:00:00 13.0 0.0 0.0
3 2016-04-01 04:00:00 12.2 0.0 0.0
4 2016-04-01 05:00:00 11.2 0.0 0.0
In [34]:
#気象庁のデータの読み込みを行う
wh = pd.read_csv('wh.csv', parse_dates={'datetime': ['date', 'time']})
wh.rename(columns=columns, inplace=True)
wh.head()
Out[34]:
datetime temp rain sunshine
0 2016-04-01 01:00:00 13.3 0.0 0.0
1 2016-04-01 02:00:00 13.8 0.0 0.0
2 2016-04-01 03:00:00 13.0 0.0 0.0
3 2016-04-01 04:00:00 12.2 0.0 0.0
4 2016-04-01 05:00:00 11.2 0.0 0.0
In [35]:
#気温と電力需要の相関を作成するのに必要なカラムを抽出する。結合条件は日時
dat = pd.merge(elec, wh, how="inner")
dat = dat.loc[:,["datetime","tky_dmnd","temp","rain","sunshine"]]
dat.head()
Out[35]:
datetime tky_dmnd temp rain sunshine
0 2016-04-01 01:00:00 2433 13.3 0.0 0.0
1 2016-04-01 02:00:00 2393 13.8 0.0 0.0
2 2016-04-01 03:00:00 2375 13.0 0.0 0.0
3 2016-04-01 04:00:00 2390 12.2 0.0 0.0
4 2016-04-01 05:00:00 2467 11.2 0.0 0.0
In [36]:
cor_dmnd  = [plotly.graph_objs.Scatter(x=dat["temp"], y=dat["tky_dmnd"], mode = "markers", marker=dict(color="rgba(64,64,64,0.6)"))]
layout = plotly.graph_objs.Layout(
    title="電力需要と気温の関係",
    legend={"x":0.8, "y":0.1},
    xaxis={"title":"気温(℃)"},
    yaxis={"title":"電力需要(万kw)"}
)
fig = plotly.graph_objs.Figure(data=cor_dmnd , layout=layout)
plotly.offline.iplot(fig, show_link=False)
In [37]:
#東京の電力消費データと気温のデータの結合を行う
tokyo = elec.join(wh["temp"]).dropna().as_matrix()
tokyo
C:\Users\shoichi\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: FutureWarning:

Method .as_matrix will be removed in a future version. Use .values instead.

Out[37]:
array([[Timestamp('2016-04-01 00:00:00'), 2555, 0, ..., 201, 2555, 13.3],
       [Timestamp('2016-04-01 01:00:00'), 2433, 0, ..., 186, 2433, 13.8],
       [Timestamp('2016-04-01 02:00:00'), 2393, 0, ..., 180, 2393, 13.0],
       ...,
       [Timestamp('2017-03-31 21:00:00'), 3619, 0, ..., 193, 3619, 13.0],
       [Timestamp('2017-03-31 22:00:00'), 3463, 0, ..., 252, 3463, 12.8],
       [Timestamp('2017-03-31 23:00:00'), 3230, 0, ..., 238, 3230, 12.2]],
      dtype=object)
In [137]:
tokyo_elec = tokyo[:, 1:2]
tokyo_wthr = tokyo[:, 14:15]
In [138]:
tokyo_elec
Out[138]:
array([[2555],
       [2433],
       [2393],
       ...,
       [3619],
       [3463],
       [3230]], dtype=object)
In [139]:
tokyo_wthr
Out[139]:
array([[13.3],
       [13.8],
       [13.0],
       ...,
       [13.0],
       [12.8],
       [12.2]], dtype=object)
In [140]:
# SVMを用いるために必要なライブラリーの呼び出し
#import sklearn.cross_validation
from sklearn.model_selection import cross_val_score
import sklearn.svm
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)

# 交差検定実施
for train, test in kf.split(tokyo_wthr):
    x_train = tokyo_wthr[train]
    x_test = tokyo_wthr[test]
    y_train = tokyo_elec[train]
    y_test = tokyo_elec[test]

    # -- SVR --
    model = sklearn.svm.SVR(gamma="scale")
    y_train = y_train.flatten()
    y_test = y_test.flatten()
    model.fit(x_train, y_train)
    print ("SVR: Training Score = %f, Testing(Validate) Score = %f" %
           (model.score(x_train, y_train), model.score(x_test, y_test)))
SVR: Training Score = 0.165240, Testing(Validate) Score = -0.655732
SVR: Training Score = 0.145986, Testing(Validate) Score = 0.118063
SVR: Training Score = 0.153808, Testing(Validate) Score = 0.185450
SVR: Training Score = 0.235393, Testing(Validate) Score = -0.030769
SVR: Training Score = 0.205353, Testing(Validate) Score = -0.368313
In [141]:
#予測モデルの作成
model = sklearn.svm.SVR(gamma="scale")
y_train = y_train.flatten()
y_test = y_test.flatten()

model.fit(x_train, y_train)
Out[141]:
SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
In [142]:
#最小値、最大値は、それぞれ気温の最高・最低を参照する。刻み込み幅は0.01
import numpy as np
px = np.arange(tokyo_wthr.min(), tokyo_wthr.max(), 0.01)[:, np.newaxis]
py = model.predict(px)
In [143]:
ppy = pd.DataFrame(px, py)
ppy.rename(columns = {0:'temp'}, inplace=True)
In [144]:
#気温と電力需要のグラフにSVMで作成された予測モデル
pred  = [plotly.graph_objs.Scatter(x=ppy["temp"], y=ppy.index, mode = "lines", marker=dict(color="rgba(30,144,255, 0.8)"))]

layout = plotly.graph_objs.Layout(
    title="SVMを用いた需要予測モデル",
    legend={"x":0.8, "y":0.1},
    xaxis={"title":"気温(℃)"},
    yaxis={"title":"電力需要(万kw)"}
)
fig = plotly.graph_objs.Figure(data=pred, layout=layout)
plotly.offline.iplot(fig, show_link=False)
In [145]:
#気温と電力需要のグラフにSVMで作成された予測モデルを重ね合わせる

cor_dmnd = plotly.graph_objs.Scatter(x=dat["temp"], y=dat["tky_dmnd"], mode = "markers",marker=dict(color="rgba(64,64,64,0.6)"), name="実測値散布")
pred = plotly.graph_objs.Scatter(x=ppy["temp"], y=ppy.index, mode = "lines", marker=dict(color="rgba(30,144,255,0.5)"), name="需要予測")

el = [cor_dmnd, pred]
layout = plotly.graph_objs.Layout(
    title="SVMを用いた需要予測モデル",
    legend={"x":1.0, "y":1.0},
    xaxis={"title":"気温(℃)"},
    yaxis={"title":"電力需要(万kw)"}
)
fig = plotly.graph_objs.Figure(data=el, layout=layout)
plotly.offline.iplot(fig, show_link=False)
In [ ]: